Better data than mine:

https://github.com/favstats/USElection2020-EdisonResearch-Results/blob/main/data/latest/presidential.csv

Load and merge:

url = "https://github.com/favstats/USElection2020-EdisonResearch-Results/raw/main/data/latest/presidential.csv"
results2020 = read.csv(url)
megafile = read.table("eday-covid.txt", header = TRUE)
library(dplyr)
results = left_join(results2020, megafile, by = "fips")

Add swings:

results$Swing2020 = results$margin2020 - results$margin2016
results$Swing2016 = results$margin2016 - results$margin2012

Some quick maps

Easy map:

# Needs a fips variable
library(usmap)
plot_usmap(regions = "counties", include = "IN",
  data = results, values = "margin2020") +
  scale_fill_gradient2(low = "darkblue", mid = "darkorchid",
    high = "red", name = "Trump margin") +
  theme(legend.position = "right")

County map:

us_states = map_data("state")
county_map$id = as.numeric(county_map$id)
# joining with election results 
county.election.df = left_join(county_map, results, by = c("id" = "fips"))
#county.election.df = drop_na(county.election.df)
ggplot(county.election.df, aes(x = long, y = lat, fill = margin2020, group = group)) + geom_polygon(color = "gray90", size = 0.25) + theme_map() + labs(fill = "Trump margin") + scale_fill_gradient2(low = "blue", mid = "darkorchid", high = "red")

Nerf the color scale:

nerf = county.election.df
nerf$margin2020[nerf$margin2020 > 25] = 25
nerf$margin2020[nerf$margin2020 < -25] = -25
ggplot(nerf, aes(x = long, y = lat, fill = margin2020, group = group)) + geom_polygon(color = "gray90", size = 0.25) + theme_map() + labs(fill = "Trump margin") + scale_fill_gradient2(low = "blue", mid = "darkorchid", high = "red")

nerfswing = county.election.df
nerfswing$Swing2020[nerfswing$Swing2020 > 10] = 10
nerfswing$Swing2020[nerfswing$Swing2020 < -10] = -10
ggplot(nerfswing, aes(x = long, y = lat, fill = Swing2020, group = group)) + geom_polygon(color = "gray90", size = 0.25) + theme_map() + scale_fill_gradient2(low = "blue", mid = "darkorchid", high = "red")

Scatterplots

2020 swing vs. 2016 swing:

results %>%
  ggplot(aes(Swing2016, Swing2020)) + 
  geom_smooth(method = "lm") +
  geom_point()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

  #geom_point(data = filter(compresults, votes >= 50000))

2020 swing vs. college education:

results %>%
  ggplot(aes(college, Swing2020)) + geom_smooth(se = FALSE) +
  geom_point(alpha = 0.1) +
  scale_x_log10() +
  xlab("Percent with a college degree (log scale)") +
  ylab("Swing (positive means Trump did better in 2020)") +
  ggtitle("Education polarization increased again in 2020") +
  labs(subtitle = "Trump improved in low education counties, did worse in high education counties")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 61 rows containing non-finite values (stat_smooth).
## Warning: Removed 61 rows containing missing values (geom_point).

Split by region:

results %>%
  filter(census_region != "NA") %>%
  ggplot(aes(college, Swing2020)) + geom_smooth(se = FALSE) +
  geom_point(alpha = 0.1) +
  scale_x_log10() +
  xlab("Percent with a college degree (log scale)") +
  ylab("Swing (positive means Trump did better in 2020)") +
  ggtitle("Education polarization increased again in 2020") +
  labs(subtitle = "Trump improved in low education counties, did worse in high education counties") +
  facet_wrap(~census_region)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

How does this compare to 2016?

results %>%
  ggplot(aes(college, Swing2016)) + geom_smooth(se = FALSE) +
  geom_point(alpha = 0.3) +
  scale_x_log10()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 61 rows containing non-finite values (stat_smooth).
## Warning: Removed 61 rows containing missing values (geom_point).

Swing by Hispanic percentage:

results$stateA = recode_factor(results$state.x, Florida = "Florida", Texas = "Texas", .default = "Everywhere else")
results %>%
  filter(stateA != "NA") %>%
  ggplot(aes(hisp_pct * 100, Swing2020)) + geom_point(alpha = 0.3) +
  geom_smooth(se = FALSE) +
#  scale_x_log10() +
  xlim(10, 100) +
  facet_wrap(~ stateA) +
  xlab("Hispanic percentage") +
  ylab("Swing (positive means Trump did better in 2020)") +
  ggtitle("Swing in counties with at least 10% Hispanic population")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2328 rows containing non-finite values (stat_smooth).
## Warning: Removed 2328 rows containing missing values (geom_point).

Swing by COVID deaths per capita:

results %>%
  ggplot(aes(deaths/popestimate2019, Swing2020)) +
  geom_point() +
  #geom_smooth(method = "lm") +
  geom_smooth(method = "gam", color = "orange") +
  scale_x_log10()
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 351 rows containing non-finite values (stat_smooth).
## Warning: Removed 61 rows containing missing values (geom_point).

Regression

Was 538 right?

display(lm(margin2020 ~ margin2012 + margin2016, data = results))
## lm(formula = margin2020 ~ margin2012 + margin2016, data = results)
##             coef.est coef.se
## (Intercept) -1.51     0.15  
## margin2012  -0.10     0.01  
## margin2016   1.12     0.01  
## ---
## n = 3110, k = 3
## residual sd = 5.19, R-Squared = 0.97

What’s the correlation of swing with (log) college education?

cor(log(results$college), results$Swing2020, use = "pairwise")
## [1] -0.5634693
display(lm(Swing2020 ~ log(college), data = results))
## lm(formula = Swing2020 ~ log(college), data = results)
##              coef.est coef.se
## (Intercept)  -12.11     0.33 
## log(college)  -7.59     0.20 
## ---
## n = 3098, k = 2
## residual sd = 4.42, R-Squared = 0.32

Is swing related to race? (Note that since the percentages are small and the data is aggregated, this may be misleading.)

display(lm(Swing2020 ~ black_pct + hisp_pct + asian_pct, data = results))
## lm(formula = Swing2020 ~ black_pct + hisp_pct + asian_pct, data = results)
##             coef.est coef.se
## (Intercept)  -0.17     0.13 
## black_pct     1.03     0.63 
## hisp_pct     10.95     0.66 
## asian_pct   -57.06     3.46 
## ---
## n = 3098, k = 4
## residual sd = 4.98, R-Squared = 0.13

Does the Hispanic result hold in e.g. Illinois?

results %>%
  filter(state.x == "Illinois") %>%
  ggplot(aes(hisp_pct, Swing2020)) + geom_text(aes(label = county))
## Warning: Removed 1 rows containing missing values (geom_text).

Is there a relationship with COVID rate?

display(lm(Swing2020 ~ log((cases+1)/popestimate2019), data = results))
## lm(formula = Swing2020 ~ log((cases + 1)/popestimate2019), data = results)
##                                  coef.est coef.se
## (Intercept)                      5.95     0.52   
## log((cases + 1)/popestimate2019) 1.55     0.14   
## ---
## n = 3098, k = 2
## residual sd = 5.25, R-Squared = 0.04

Is there a relationship with COVID rate after accounting for college education?

display(lm(Swing2020 ~ log((deaths+1)/popestimate2019) + log(college), data = results))
## lm(formula = Swing2020 ~ log((deaths + 1)/popestimate2019) + 
##     log(college), data = results)
##                                   coef.est coef.se
## (Intercept)                       -8.82     0.82  
## log((deaths + 1)/popestimate2019)  0.39     0.09  
## log(college)                      -7.39     0.20  
## ---
## n = 3098, k = 3
## residual sd = 4.41, R-Squared = 0.32